Set up themes and notebook-wide settings.
# set up themes and notebook-wide settings
my.alpha = .2
colors <- c("#2185c5ff","#ff9715ff","#f20253ff","#7ecefdff","#1c3aa9ff")
fr.colors <- c(colors[2], colors[3])
se <- function(x) sqrt(var(x)/length(x)) #function to calculate SE
Set global working directory.
% time chosen values are mean-centered at the dyad level.
# load friendship dyad data frame
df <- read.csv("./Paper-Analysis/data/friend-dyads.csv")
# add dyad pair column
df <- data.frame(df, stringsAsFactors = F) %>%
mutate(dyad = paste0(pmin(Target,ChoiceOption), pmax(Target,ChoiceOption), sep=""))
# ONLY include dyads who are in the same tribe (SameTribe = 1)
sim <- subset(df, SameTribe == 1)
# only include similarity, time, dyad name
sim <- sim[,c(11,7,16)]
# remove duplicate similarity rows
sim <- distinct(sim, Time, dyad, .keep_all = T)
# correlate time with similarity
corr <- sim %>%
group_by(dyad) %>%
mutate(correlation = cor(similarity,Time))
# get mean % time per PID chosen from friend choices (collapse across clips)
friends <- df %>%
group_by(PID, dyad, condType) %>%
summarize(TimeChosen = mean(DyadPercentTimeChosen))
# This pipe gives a friendly warning:
# `summarise()` has grouped output by 'PID', 'dyad'. You can override using the `.groups` argument.
# This is not an issue, it's just telling me what columns I'm grouping by before summarizing.
# combine with % time chosen as friends from friends.choices
friend.corr <- merge(corr, friends, by = "dyad")
# remove friends & sim data frames
rm(friends)
rm(sim)
# load rivalry dyad data frame
df <- read.csv("./Paper-Analysis/data/rival-dyads.csv")
# add dyad pair column
df <- data.frame(df, stringsAsFactors = F) %>%
mutate(dyad = paste0(pmin(Target,ChoiceOption), pmax(Target,ChoiceOption), sep=""))
# ONLY include dyads who are in the same tribe (SameTribe = 1)
sim <- subset(df, SameTribe == 1)
# only include similarity, time, dyad name
sim <- sim[,c(11,7,16)]
# remove duplicate similarity rows
sim <- distinct(sim, Time, dyad, .keep_all = T)
# correlate time with similarity
corr <- sim %>%
group_by(dyad) %>%
mutate(correlation = cor(similarity,Time))
# get mean % time per PID chosen from friend choices (collapse across clips)
rivals <- df %>%
group_by(PID, dyad, condType) %>%
summarize(TimeChosen = mean(DyadPercentTimeChosen))
# This pipe gives a friendly warning:
# `summarise()` has grouped output by 'PID', 'dyad'. You can override using the `.groups` argument.
# This is not an issue, it's just telling me what columns I'm grouping by before summarizing.
# combine with % time chosen as friends from friends.choices
rival.corr <- merge(corr, rivals, by = "dyad")
# remove friends & sim data frames
rm(rivals)
rm(sim)
Note: I do not mean center % time chosen because it is constrained between 0 and 1 (can’t have negative % time chosen).
# combine all correlations for friendship and rivalry
corr <- rbind(friend.corr, rival.corr)
names(corr)[7] <- "PercentChosen"
write.csv(corr, "./Paper-Analysis/data/friends_rivals_dyads.csv")
# make condType a factor
corr$condType <- as.factor(corr$condType)
# print number of similarity assessments
assessments <- corr %>%
group_by(dyad, Time, condType) %>% # include number of dyads at each time point per condType
summarize(similarityCount = mean(similarity))
numObs <- nrow(assessments)
print(paste0("Number of similarity assessments (count per condition type and time point) = ", numObs, sep = ""))
## [1] "Number of similarity assessments (count per condition type and time point) = 296"
# run overall linear regression
# how is the similarity per dyad predicted by percent chosen as friends/rivals in episode 13 and time?
# three way interaction between % time chosen, condition type (friend/rival), and time
model1.wholeSeason <- lmer(similarity ~ PercentChosen * condType * Time + (1|PID) + (1|dyad),
data = corr)
# Singularity is a confirmed non-issue resolved with Bayesian approach. See supplemental section.
# summarize model
summary(model1.wholeSeason)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: similarity ~ PercentChosen * condType * Time + (1 | PID) + (1 |
## dyad)
## Data: corr
##
## REML criterion at convergence: -23924.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1660659 -0.6079282 0.0316622 0.6800781 2.3621620
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 0.000000000 0.00000000
## dyad (Intercept) 0.005613796 0.07492527
## Residual 0.014020508 0.11840823
## Number of obs: 16872, groups: PID, 57; dyad, 21
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 4.794128e-01 1.854504e-02 3.253092e+01
## PercentChosen 6.900346e-02 1.415950e-02 1.685384e+04
## condTypeRival 6.303809e-02 1.179738e-02 1.685735e+04
## Time 9.839174e-03 1.096583e-03 1.686294e+04
## PercentChosen:condTypeRival -1.247946e-01 2.215991e-02 1.685861e+04
## PercentChosen:Time -1.019600e-02 1.826745e-03 1.685722e+04
## condTypeRival:Time -9.244989e-03 1.501016e-03 1.686099e+04
## PercentChosen:condTypeRival:Time 1.834147e-02 2.802509e-03 1.686228e+04
## t value Pr(>|t|)
## (Intercept) 25.85127 < 2.22e-16 ***
## PercentChosen 4.87330 1.1075e-06 ***
## condTypeRival 5.34340 9.2406e-08 ***
## Time 8.97258 < 2.22e-16 ***
## PercentChosen:condTypeRival -5.63155 1.8145e-08 ***
## PercentChosen:Time -5.58151 2.4211e-08 ***
## condTypeRival:Time -6.15915 7.4793e-10 ***
## PercentChosen:condTypeRival:Time 6.54466 6.1349e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrcntC cndTyR Time PrC:TR PrcC:T cnTR:T
## PercentChsn -0.433
## condTypeRvl -0.359 0.733
## Time -0.424 0.814 0.656
## PrcntChs:TR 0.310 -0.710 -0.929 -0.561
## PrcntChsn:T 0.385 -0.874 -0.614 -0.921 0.599
## cndTypRvl:T 0.320 -0.625 -0.870 -0.746 0.801 0.695
## PrcntC:TR:T -0.278 0.612 0.806 0.652 -0.868 -0.695 -0.925
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# get confidence intervals
confint(model1.wholeSeason)
## 2.5 % 97.5 %
## .sig01 0.000000000000 0.001837998933
## .sig02 0.055348361656 0.102524714938
## .sigma 0.117130926891 0.119658911057
## (Intercept) 0.442761445009 0.516170900780
## PercentChosen 0.041270211884 0.096766918059
## condTypeRival 0.039933384890 0.086172233361
## Time 0.007693621152 0.011992543829
## PercentChosen:condTypeRival -0.168250827174 -0.081396720977
## PercentChosen:Time -0.013781129336 -0.006620540327
## condTypeRival:Time -0.012192325622 -0.006307984826
## PercentChosen:condTypeRival:Time 0.012858346069 0.023845202444
# get simple slopes
simple_slopes(model1.wholeSeason)
# get -1SD slope, mean slope, +1SD slope to plot similarity X time
simple_slopes(model1.wholeSeason) %>%
filter(Time == "sstest")
# plot
plot_model(model1.wholeSeason, type = "pred",
terms = c("Time","PercentChosen","condType"))
# plot_model(model1.wholeSeason, type = "int",
# mdrt.values = "meansd")
# Using summarize in this chunk gives the following messages:
# `summarise()` has grouped output by 'dyad', 'Time'. You can override using the `.groups` argument.
# This is not an issue, it is just telling me which columns I've grouped by before summarizing.
# ggpredict
model.df <- ggpredict(model1.wholeSeason, ci.lvl = 0.95,
terms = c("Time","PercentChosen","condType"))
model.df <- as.data.frame(model.df)
# plot results!
friend.df <- subset(model.df, facet == "Friend")
rival.df <- subset(model.df, facet == "Rival")
# 3 color gradient with friend and rival colors
friend.color.gradient <- c("#ffbf70",fr.colors[1],"#cc7000")
rival.color.gradient <- c("#fd4985",fr.colors[2],"#a20237")
# friend condition, x axis = time, y axis = similarity, group = percent chosen as friends
ggplot(friend.df, aes(x = x, y = predicted, color = group, fill = group)) +
geom_line(size = 1.5) +
geom_ribbon(alpha = .15,
aes(ymin = conf.low, ymax = conf.high),
linetype = .5) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 11)) +
scale_fill_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = friend.color.gradient, name = "% time chosen") +
scale_color_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = friend.color.gradient, name = "% time chosen") +
xlab("Time") +
ylab("dyadic semantic similarity") +
theme_classic() +
theme(legend.position = "right",
panel.background = element_blank(),
axis.title.x = element_text(vjust = -0.4, size = 16),
axis.title.y = element_text(vjust = 1.5, size = 16),
axis.text = element_text(size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12))
friend <- last_plot()
# rival condition, x axis = time, y axis = similarity, group = percent chosen as friends
ggplot(rival.df, aes(x = x, y = predicted, color = group, fill = group)) +
geom_line(size = 1.5) +
geom_ribbon(alpha = .15,
aes(ymin = conf.low, ymax = conf.high),
linetype = .5) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 11)) +
scale_fill_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = rival.color.gradient, name = "% time chosen") +
scale_color_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = rival.color.gradient, name = "% time chosen") +
xlab("Time") +
ylab("dyadic semantic similarity") +
theme_classic() +
theme(legend.position = "right",
panel.background = element_blank(),
axis.title.x = element_text(vjust = -0.4, size = 16),
axis.title.y = element_text(vjust = 1.5, size = 16),
axis.text = element_text(size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12))
rival <- last_plot()
# plot together and save!
friend | rival
ggsave(filename = "Study3-multiplot.jpeg",
path = "/Volumes/GoogleDrive/My Drive/SANLab/Manuscripts/Survivor+Language/MarkdownFigures",
width = 10, height = 5,
units = c("in"))
ggsave(filename = "Study3-multiplot.pdf",
path = "/Volumes/GoogleDrive/My Drive/SANLab/Manuscripts/Survivor+Language/MarkdownFigures",
width = 10, height = 5,
units = c("in"))
# run multilevel model
model.wholeSeason <- lmer(similarity ~ PercentChosen * condType * Time + (1|PID) + (1|dyad),
data = corr)
# run main effect model (null)
null.wholeSeason <- lmer(similarity ~ PercentChosen + condType + Time + (1|dyad) + (1|PID),
data = corr)
anova(model.wholeSeason, null.wholeSeason)
# standardize similarity, PercentChosen, and Time
corr$similarityZ <- as.numeric(scale(corr$similarity))
corr$PercentChosenZ <- as.numeric(scale(corr$PercentChosen))
corr$TimeZ <- as.numeric(scale(corr$Time))
model.wholeSeason <- lmer(similarityZ ~ PercentChosenZ * condType * TimeZ + (1|PID) + (1|dyad),
data = corr)
summary(model.wholeSeason)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: similarityZ ~ PercentChosenZ * condType * TimeZ + (1 | PID) +
## (1 | dyad)
## Data: corr
##
## REML criterion at convergence: 44273.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1660659 -0.6079282 0.0316622 0.6800781 2.3621620
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 0.0000000 0.0000000
## dyad (Intercept) 0.3202370 0.5658948
## Residual 0.7997945 0.8943123
## Number of obs: 16872, groups: PID, 57; dyad, 21
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 1.190602e-01 1.240301e-01 2.001179e+01
## PercentChosenZ 1.782257e-04 1.099626e-02 1.686392e+04
## condTypeRival 1.032868e-03 1.410644e-02 1.684393e+04
## TimeZ 1.168886e-01 1.082302e-02 1.686386e+04
## PercentChosenZ:condTypeRival -1.383327e-03 1.759752e-02 1.685810e+04
## PercentChosenZ:TimeZ -5.348916e-02 9.583270e-03 1.685722e+04
## condTypeRival:TimeZ -6.194246e-04 1.414492e-02 1.684391e+04
## PercentChosenZ:condTypeRival:TimeZ 9.622101e-02 1.470222e-02 1.686228e+04
## t value Pr(>|t|)
## (Intercept) 0.95993 0.34855
## PercentChosenZ 0.01621 0.98707
## condTypeRival 0.07322 0.94163
## TimeZ 10.80000 < 2.22e-16 ***
## PercentChosenZ:condTypeRival -0.07861 0.93734
## PercentChosenZ:TimeZ -5.58151 2.4211e-08 ***
## condTypeRival:TimeZ -0.04379 0.96507
## PercentChosenZ:condTypeRival:TimeZ 6.54466 6.1349e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrcnCZ cndTyR TimeZ PrCZ:TR PCZ:TZ cTR:TZ
## PercntChsnZ -0.006
## condTypeRvl -0.057 0.109
## TimeZ -0.020 0.049 0.026
## PrcntChZ:TR 0.000 -0.759 0.013 0.001
## PrcntChZ:TZ 0.018 -0.005 -0.074 -0.211 0.010
## cndTypRv:TZ 0.002 -0.070 -0.045 -0.646 -0.012 0.113
## PrcCZ:TR:TZ -0.017 0.012 -0.015 0.176 -0.027 -0.695 0.035
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(model.wholeSeason)
## 2.5 % 97.5 %
## .sig01 0.00000000000 0.01388203345
## .sig02 0.41803443945 0.77434741453
## .sigma 0.88466512063 0.90375845036
## (Intercept) -0.12919590997 0.36771686212
## PercentChosenZ -0.02140098501 0.02170157596
## condTypeRival -0.02660997551 0.02867792362
## TimeZ 0.09571124849 0.13813942844
## PercentChosenZ:condTypeRival -0.03581244351 0.03317447536
## PercentChosenZ:TimeZ -0.07229705221 -0.03473195395
## condTypeRival:TimeZ -0.02833841531 0.02710029720
## PercentChosenZ:condTypeRival:TimeZ 0.06745604762 0.12509409091
simple_slopes(model.wholeSeason)
null.wholeSeason <- lmer(similarityZ ~ PercentChosenZ + condType + TimeZ + (1|dyad) + (1|PID),
data = corr)
anova(model.wholeSeason, null.wholeSeason)
We took a Bayesian approach in order to establish that singularity would not undermine the validity of the models. Singularity typically occurs when there is limited variability within the random effects term(s) and/or the random effects structure is overly-complex relative to the variability offered by the data. Given the nature of our data, we opted to retain our random effects in all frequentist models after examining the same models with a Bayesian approach (using uninformative/default priors, which should, and do, yield the same beta estimates as a frequentist approach).
colors <- c("#2185c5ff","#ff9715ff","#f20253ff","#7ecefdff","#1c3aa9ff")
fr.colors <- c(colors[2], colors[3])
# 3 color gradient with friend and rival colors
friend.color.gradient <- c("#ffbf70",fr.colors[1],"#cc7000")
rival.color.gradient <- c("#fd4985",fr.colors[2],"#a20237")
library(bayesplot)
library(brms)
library(tidybayes)
library(interactions)
model1.wholeSeason.brms <- brm(data = corr,
family = gaussian,
similarity ~ PercentChosen * condType * Time + (1|PID) + (1|dyad),
iter = 4000, warmup = 1500, chains = 3, cores = 3,
control = list(adapt_delta = .999, max_treedepth = 15),
seed = 9)
summary(model1.wholeSeason.brms, waic = TRUE)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: similarity ~ PercentChosen * condType * Time + (1 | PID) + (1 | dyad)
## Data: corr (Number of observations: 16872)
## Draws: 3 chains, each with iter = 4000; warmup = 1500; thin = 1;
## total post-warmup draws = 7500
##
## Group-Level Effects:
## ~dyad (Number of levels: 21)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.01 0.06 0.11 1.00 1240 2211
##
## ~PID (Number of levels: 57)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.00 0.00 0.00 0.00 1.00 4718 2894
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept 0.48 0.02 0.44 0.52 1.01
## PercentChosen 0.07 0.01 0.04 0.10 1.00
## condTypeRival 0.06 0.01 0.04 0.09 1.00
## Time 0.01 0.00 0.01 0.01 1.00
## PercentChosen:condTypeRival -0.13 0.02 -0.17 -0.08 1.00
## PercentChosen:Time -0.01 0.00 -0.01 -0.01 1.00
## condTypeRival:Time -0.01 0.00 -0.01 -0.01 1.00
## PercentChosen:condTypeRival:Time 0.02 0.00 0.01 0.02 1.00
## Bulk_ESS Tail_ESS
## Intercept 629 1478
## PercentChosen 2560 3601
## condTypeRival 2209 3883
## Time 2985 4040
## PercentChosen:condTypeRival 2208 3376
## PercentChosen:Time 2873 3813
## condTypeRival:Time 2426 4054
## PercentChosen:condTypeRival:Time 2385 3865
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.12 0.00 0.12 0.12 1.00 12284 5003
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
conditional_effects(model1.wholeSeason.brms)
pairs(model1.wholeSeason.brms)
plot(model1.wholeSeason.brms)
interactions::interact_plot(model = model1.wholeSeason.brms,
pred = Time,
modx = PercentChosen,
mod2 = condType,
data = corr)
model1.wholeSeason.brms %>%
neff_ratio() %>%
mcmc_neff_hist(binwidth = .1) +
yaxis_text()
#main effect model for comparison
model1.wholeSeason.brms.main <- brm(data = corr,
family = gaussian,
similarity ~ PercentChosen + condType + Time + (1|PID) + (1|dyad),
iter = 4000, warmup = 1500, chains = 3, cores = 3,
control = list(adapt_delta = .999, max_treedepth = 15),
seed = 9)
#model comparison via loo
model1.wholeSeason.brms <- add_criterion(model1.wholeSeason.brms, "loo")
model1.wholeSeason.brms.main <- add_criterion(model1.wholeSeason.brms.main, "loo")
loo_compare(model1.wholeSeason.brms, model1.wholeSeason.brms.main) %>%
print(simplify = F)
## elpd_diff se_diff elpd_loo se_elpd_loo p_loo
## model1.wholeSeason.brms 0.0 0.0 12041.8 98.0 30.6
## model1.wholeSeason.brms.main -17.9 7.0 12023.8 97.6 26.5
## se_p_loo looic se_looic
## model1.wholeSeason.brms 0.4 -24083.5 196.0
## model1.wholeSeason.brms.main 0.4 -24047.6 195.2
#plot!
sim_int <- interactions::interact_plot(model = model1.wholeSeason.brms,
pred = Time,
modx = PercentChosen,
mod2 = condType,
data = corr,
interval = T)
sim_int <- as.data.frame(sim_int$data)
friend_int <- subset(sim_int, mod2_group == "condType = Friend")
rival_int <- subset(sim_int, mod2_group == "condType = Rival")
# friend plot
ggplot(friend_int, aes(x = Time, y = similarity, color = modx_group, fill = modx_group)) +
geom_line(size = 1.5) +
geom_ribbon(alpha = .15,
aes(ymin = ymin, ymax = ymax),
linetype = .5) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 11)) +
scale_fill_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = friend.color.gradient, name = "% time chosen") +
scale_color_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = friend.color.gradient, name = "% time chosen") +
xlab("Time") +
ylab("dyadic semantic similarity") +
theme_classic() +
theme(legend.position = "right",
panel.background = element_blank(),
axis.title.x = element_text(vjust = -0.4, size = 16),
axis.title.y = element_text(vjust = 1.5, size = 16),
axis.text = element_text(size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12))
friend <- last_plot()
# rival plot
ggplot(rival_int, aes(x = Time, y = similarity, color = modx_group, fill = modx_group)) +
geom_line(size = 1.5) +
geom_ribbon(alpha = .15,
aes(ymin = ymin, ymax = ymax),
linetype = .5) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 11)) +
scale_fill_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = rival.color.gradient, name = "% time chosen") +
scale_color_manual(labels = c("0.29 (-1 SD)", "0.5 (mean)", "0.71 (+1 SD)"),
values = rival.color.gradient, name = "% time chosen") +
xlab("Time") +
ylab("dyadic semantic similarity") +
theme_classic() +
theme(legend.position = "right",
panel.background = element_blank(),
axis.title.x = element_text(vjust = -0.4, size = 16),
axis.title.y = element_text(vjust = 1.5, size = 16),
axis.text = element_text(size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12))
rival <- last_plot()
# plot together and save!
friend | rival
ggsave(filename = "Study3-bayes-multiplot.jpeg",
path = "/Volumes/GoogleDrive/My Drive/SANLab/Manuscripts/Survivor+Language/MarkdownFigures",
width = 10, height = 5,
units = c("in"))
ggsave(filename = "Study3-bayes-multiplot.pdf",
path = "/Volumes/GoogleDrive/My Drive/SANLab/Manuscripts/Survivor+Language/MarkdownFigures",
width = 10, height = 5,
units = c("in"))